{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e25cd1d1-8c57-42ab-bb2c-4a70617361c4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T07:36:37.572016Z",
     "iopub.status.busy": "2024-10-11T07:36:37.571650Z",
     "iopub.status.idle": "2024-10-11T07:36:37.576090Z",
     "shell.execute_reply": "2024-10-11T07:36:37.575614Z",
     "shell.execute_reply.started": "2024-10-11T07:36:37.571982Z"
    },
    "tags": []
   },
   "source": [
    "# 第一章 -- 认识LLM\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d93e75a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%capture\n",
    "# !pip install transformers>=4.40.1 accelerate>=0.27.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "7c8f86ef-c637-488f-923a-95fe515a60a1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T07:56:03.928487Z",
     "iopub.status.busy": "2024-10-11T07:56:03.928036Z",
     "iopub.status.idle": "2024-10-11T07:56:04.366237Z",
     "shell.execute_reply": "2024-10-11T07:56:04.365536Z",
     "shell.execute_reply.started": "2024-10-11T07:56:03.928450Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fri Oct 11 07:56:04 2024       \n",
      "+---------------------------------------------------------------------------------------+\n",
      "| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |\n",
      "|-----------------------------------------+----------------------+----------------------+\n",
      "| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
      "| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n",
      "|                                         |                      |               MIG M. |\n",
      "|=========================================+======================+======================|\n",
      "|   0  NVIDIA GeForce RTX 4090        On  | 00000000:A1:00.0 Off |                  Off |\n",
      "| 30%   30C    P8              17W / 450W |   2854MiB / 24564MiB |      0%      Default |\n",
      "|                                         |                      |                  N/A |\n",
      "+-----------------------------------------+----------------------+----------------------+\n",
      "                                                                                         \n",
      "+---------------------------------------------------------------------------------------+\n",
      "| Processes:                                                                            |\n",
      "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
      "|        ID   ID                                                             Usage      |\n",
      "|=======================================================================================|\n",
      "+---------------------------------------------------------------------------------------+\n"
     ]
    }
   ],
   "source": [
    "# 检查当前机器是可以使用 GPU，并且已经安装了正确的 CUDA 版本\n",
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "92789975-f714-4258-815f-a3383ff1025e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T07:39:27.551763Z",
     "iopub.status.busy": "2024-10-11T07:39:27.551350Z",
     "iopub.status.idle": "2024-10-11T07:39:27.564323Z",
     "shell.execute_reply": "2024-10-11T07:39:27.563116Z",
     "shell.execute_reply.started": "2024-10-11T07:39:27.551728Z"
    },
    "tags": []
   },
   "source": [
    "## 1.1 Qwen/Qwen2.5-0.5B-Instruct\n",
    "假定用户已经对机器学习（ML） 和深度学习（DL） 有一定的了解， 第一部就是加载一个模型进行推理预测。可以对模型和 tokenizer 进行分别加载."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c75a18db",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"HF_HOME\"] = \"/openbayes/home/huggingface\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fd34a03a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/openbayes/home/huggingface\n"
     ]
    }
   ],
   "source": [
    "!echo $HF_HOME"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "502af603-cffa-4e19-b582-39de47038fbf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T08:00:58.279143Z",
     "iopub.status.busy": "2024-10-11T08:00:58.278713Z",
     "iopub.status.idle": "2024-10-11T08:00:59.439221Z",
     "shell.execute_reply": "2024-10-11T08:00:59.438712Z",
     "shell.execute_reply.started": "2024-10-11T08:00:58.279109Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Qwen2ForCausalLM(\n",
      "  (model): Qwen2Model(\n",
      "    (embed_tokens): Embedding(151936, 896)\n",
      "    (layers): ModuleList(\n",
      "      (0-23): 24 x Qwen2DecoderLayer(\n",
      "        (self_attn): Qwen2SdpaAttention(\n",
      "          (q_proj): Linear(in_features=896, out_features=896, bias=True)\n",
      "          (k_proj): Linear(in_features=896, out_features=128, bias=True)\n",
      "          (v_proj): Linear(in_features=896, out_features=128, bias=True)\n",
      "          (o_proj): Linear(in_features=896, out_features=896, bias=False)\n",
      "          (rotary_emb): Qwen2RotaryEmbedding()\n",
      "        )\n",
      "        (mlp): Qwen2MLP(\n",
      "          (gate_proj): Linear(in_features=896, out_features=4864, bias=False)\n",
      "          (up_proj): Linear(in_features=896, out_features=4864, bias=False)\n",
      "          (down_proj): Linear(in_features=4864, out_features=896, bias=False)\n",
      "          (act_fn): SiLU()\n",
      "        )\n",
      "        (input_layernorm): Qwen2RMSNorm()\n",
      "        (post_attention_layernorm): Qwen2RMSNorm()\n",
      "      )\n",
      "    )\n",
      "    (norm): Qwen2RMSNorm()\n",
      "  )\n",
      "  (lm_head): Linear(in_features=896, out_features=151936, bias=False)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "\n",
    "\n",
    "# 加载模型和 tokenizer （第一次加载的时候会进行下载）\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    \"Qwen/Qwen2.5-0.5B-Instruct\",\n",
    "    device_map=\"cuda\",\n",
    "    torch_dtype=\"auto\",\n",
    "    trust_remote_code=True,\n",
    ")\n",
    "# 查看 模型结构是什么？\n",
    "print(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "ba34a386-1047-4a7a-adf3-554d53451717",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T07:54:03.311834Z",
     "iopub.status.busy": "2024-10-11T07:54:03.311382Z",
     "iopub.status.idle": "2024-10-11T07:54:03.782818Z",
     "shell.execute_reply": "2024-10-11T07:54:03.782328Z",
     "shell.execute_reply.started": "2024-10-11T07:54:03.311799Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'eos_token': '<|im_end|>',\n",
       " 'pad_token': '<|endoftext|>',\n",
       " 'additional_special_tokens': ['<|im_start|>',\n",
       "  '<|im_end|>',\n",
       "  '<|object_ref_start|>',\n",
       "  '<|object_ref_end|>',\n",
       "  '<|box_start|>',\n",
       "  '<|box_end|>',\n",
       "  '<|quad_start|>',\n",
       "  '<|quad_end|>',\n",
       "  '<|vision_start|>',\n",
       "  '<|vision_end|>',\n",
       "  '<|vision_pad|>',\n",
       "  '<|image_pad|>',\n",
       "  '<|video_pad|>']}"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 这里有一个 Special token, 打印看一下是什么token呢？\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"Qwen/Qwen2.5-0.5B-Instruct\")\n",
    "# tokenizer.all_special_tokens\n",
    "tokenizer.special_tokens_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "a815b098-0eb7-4959-b234-5f2de29baabd",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T07:54:59.563955Z",
     "iopub.status.busy": "2024-10-11T07:54:59.563516Z",
     "iopub.status.idle": "2024-10-11T07:54:59.568582Z",
     "shell.execute_reply": "2024-10-11T07:54:59.567943Z",
     "shell.execute_reply.started": "2024-10-11T07:54:59.563920Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 每一个模型的 special token 都是不一样的（但是大同小异）\n",
    "# tokenizer = AutoTokenizer.from_pretrained(\"/openbayes/input/input0\")\n",
    "# tokenizer.special_tokens_map"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ac2ba87-c5c8-46bb-aa4b-c496f0d42acf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T07:41:24.660416Z",
     "iopub.status.busy": "2024-10-11T07:41:24.660193Z",
     "iopub.status.idle": "2024-10-11T07:41:25.156935Z",
     "shell.execute_reply": "2024-10-11T07:41:25.155779Z",
     "shell.execute_reply.started": "2024-10-11T07:41:24.660400Z"
    },
    "tags": []
   },
   "source": [
    "## 1.2 基础使用方式\n",
    "加载模型之后可以怎么使用呢？可以直接使用原始的 model 和 tokenizer 进行推理；\n",
    "\n",
    "- step1: 加载模型\n",
    "- step2: 构建 prompt 和 tokenizer\n",
    "- step3: 推理和解码\n",
    "\n",
    "刚刚已经加载过模型了，所以直接进行 step2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "d2650df5-3b05-4ba5-9f43-3bce0c31e9eb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T08:00:36.492148Z",
     "iopub.status.busy": "2024-10-11T08:00:36.491852Z",
     "iopub.status.idle": "2024-10-11T08:00:36.499648Z",
     "shell.execute_reply": "2024-10-11T08:00:36.499240Z",
     "shell.execute_reply.started": "2024-10-11T08:00:36.492129Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|im_start|>system\n",
      "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n",
      "<|im_start|>user\n",
      "讲一个猫有关的笑话？<|im_end|>\n",
      "<|im_start|>assistant\n",
      "\n",
      "========================================\n",
      "{'input_ids': tensor([[151644,   8948,    198,   2610,    525,   1207,  16948,     11,   3465,\n",
      "            553,  54364,  14817,     13,   1446,    525,    264,  10950,  17847,\n",
      "             13, 151645,    198, 151644,    872,    198,  99526,  46944, 100472,\n",
      "         101063,   9370, 109959,  11319, 151645,    198, 151644,  77091,    198]],\n",
      "       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}\n"
     ]
    }
   ],
   "source": [
    "prompt = \"讲一个猫有关的笑话？\"\n",
    "messages = [\n",
    "    {\"role\": \"system\", \"content\": \"You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\"},\n",
    "    {\"role\": \"user\", \"content\": prompt}\n",
    "]\n",
    "\n",
    "#  想一下诗变成什么格式\n",
    "text = tokenizer.apply_chat_template(\n",
    "    messages,\n",
    "    tokenize=False,\n",
    "    add_generation_prompt=True\n",
    ")\n",
    "model_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\n",
    "\n",
    "print(text)\n",
    "print(\"====\" * 10)\n",
    "print(model_inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "304265f3-16c7-4b76-89d1-991bd5628022",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T08:02:02.184269Z",
     "iopub.status.busy": "2024-10-11T08:02:02.183829Z",
     "iopub.status.idle": "2024-10-11T08:02:03.095914Z",
     "shell.execute_reply": "2024-10-11T08:02:03.095439Z",
     "shell.execute_reply.started": "2024-10-11T08:02:02.184233Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "有一只猫对一只小老鼠说：“你真聪明，能帮我找到妈妈！” 小老鼠想了想，回答道：“是啊，我叫‘小红帽’。” 这只猫听了哈哈大笑起来。\n"
     ]
    }
   ],
   "source": [
    "# step3: 推理和解码\n",
    "generated_ids = model.generate(\n",
    "    **model_inputs,\n",
    "    max_new_tokens=512\n",
    ")\n",
    "generated_ids = [\n",
    "    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n",
    "]\n",
    "\n",
    "response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "35e6c259-989a-4b1e-8f2f-b3d420af24dd",
   "metadata": {},
   "source": [
    "## 1.3 使用 transformers 的 pipeline 简化流程"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "cac58a8f-09e3-43df-9650-52956254c420",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T08:04:15.400688Z",
     "iopub.status.busy": "2024-10-11T08:04:15.400052Z",
     "iopub.status.idle": "2024-10-11T08:04:19.558456Z",
     "shell.execute_reply": "2024-10-11T08:04:19.557925Z",
     "shell.execute_reply.started": "2024-10-11T08:04:15.400651Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/output/envs/hands-on-llm/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:515: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
      "  warnings.warn(\n",
      "/output/envs/hands-on-llm/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:520: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.8` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
      "  warnings.warn(\n",
      "/output/envs/hands-on-llm/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:537: UserWarning: `do_sample` is set to `False`. However, `top_k` is set to `20` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_k`.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "好的，以下是一个关于猫的笑话：\n",
      "\n",
      "有一天，一只猫在森林里迷路了。它四处张望，但什么也看不见。突然，它看到了一棵树上挂着一个牌子，上面写着“欢迎光临”。猫好奇地走过去，发现牌子后面有一个小洞。\n",
      "\n",
      "猫小心翼翼地走进洞穴，里面有一只小老鼠正在吃着食物。猫对老鼠说：“你好，我是来自森林的小动物。”老鼠回答道：“我叫米奇，很高兴见到你。”\n",
      "\n",
      "猫对米奇说：“我也很高兴遇见你，但我需要一些食物来养活自己。”米奇笑了笑，说：“那我就给你煮个饭吧，你尝尝看。”\n",
      "\n",
      "于是，猫和米奇一起开始烹饪食物。猫用它的爪子扒开泥土，米奇则用他的牙齿咬碎坚果。他们一边吃一边聊，很快就度过了一个愉快的夜晚。\n",
      "\n",
      "这个故事告诉我们，有时候，我们可能会遇到各种各样的人或事，但只要我们保持耐心、友好，并且乐于分享我们的经历，就有可能结识到新朋友。\n"
     ]
    }
   ],
   "source": [
    "from transformers import pipeline\n",
    "\n",
    "\n",
    "# step1: 生成 pipeline\n",
    "# return_full_text=False 表示只返回新生成的文本，不包含输入的prompt\n",
    "# return_full_text=True 则会返回完整文本，包含输入的prompt和生成的新文本\n",
    "generator = pipeline(\n",
    "    \"text-generation\", \n",
    "    model=model,\n",
    "    tokenizer=tokenizer,\n",
    "    return_full_text=False,  # 只返回新生成的文本部分\n",
    "    max_new_tokens=500,\n",
    "    do_sample=False\n",
    ")\n",
    "\n",
    "# step2: 构建 prompt\n",
    "messages = [\n",
    "    {\"role\": \"user\", \"content\": \"写一个和猫有关的笑话.\"}\n",
    "]\n",
    "\n",
    "# step3，输出并解码\n",
    "output = generator(messages)\n",
    "print(output[0][\"generated_text\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "586b6f58-8623-431e-ad76-afa8de650064",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-10-11T08:04:49.207035Z",
     "iopub.status.busy": "2024-10-11T08:04:49.206594Z",
     "iopub.status.idle": "2024-10-11T08:04:49.213341Z",
     "shell.execute_reply": "2024-10-11T08:04:49.212867Z",
     "shell.execute_reply.started": "2024-10-11T08:04:49.207001Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'generated_text': '好的，以下是一个关于猫的笑话：\\n\\n有一天，一只猫在森林里迷路了。它四处张望，但什么也看不见。突然，它看到了一棵树上挂着一个牌子，上面写着“欢迎光临”。猫好奇地走过去，发现牌子后面有一个小洞。\\n\\n猫小心翼翼地走进洞穴，里面有一只小老鼠正在吃着食物。猫对老鼠说：“你好，我是来自森林的小动物。”老鼠回答道：“我叫米奇，很高兴见到你。”\\n\\n猫对米奇说：“我也很高兴遇见你，但我需要一些食物来养活自己。”米奇笑了笑，说：“那我就给你煮个饭吧，你尝尝看。”\\n\\n于是，猫和米奇一起开始烹饪食物。猫用它的爪子扒开泥土，米奇则用他的牙齿咬碎坚果。他们一边吃一边聊，很快就度过了一个愉快的夜晚。\\n\\n这个故事告诉我们，有时候，我们可能会遇到各种各样的人或事，但只要我们保持耐心、友好，并且乐于分享我们的经历，就有可能结识到新朋友。'}]"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "output"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "hands-on-llm",
   "language": "python",
   "name": "hands-on-llm"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
